library(tidyverse)
Warning messages:
1: In system("timedatectl", intern = TRUE) :
  running command 'timedatectl' had status 1
2: In readChar(file, size, TRUE) : truncating string with embedded nuls

setup for GO term analysis

The LMM was run by Natsuhiko, see /nfs/team205/nk5/Team205/ed6/DE/run.R

ct_df <- read_tsv(list.files('/nfs/team205/nk5/Team205/ed6/DE/', pattern = "^cell", full.names = TRUE), col_names = FALSE) %>%
  rename(ID=X1, celltype=X2)

── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  X1 = col_double(),
  X2 = col_character()
)

Read results

Table provided by Natsuhiko

de_genes <- read_csv("~/mount/gdrive/Pan_fetal/significant_genes/LMM_Natsuhiko_results/de_lymphoid_ltsr0.9.csv")

── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  celltype = col_character(),
  `organ (in which the gene is up/down-regulated)` = col_character(),
  gene = col_character(),
  `ltsr (posterior prob of DE)` = col_double(),
  logFoldChange = col_double(),
  BM = col_double(),
  GU = col_double(),
  KI = col_double(),
  LI = col_double(),
  MLN = col_double(),
  SK = col_double(),
  SP = col_double(),
  TH = col_double(),
  YS = col_double()
)
colnames(de_genes) <- str_remove(colnames(de_genes), " .+")
de_genes

Save number of cells x celltype and organ

n_cells <- read_csv('/nfs/team205/nk5/Team205/ed6/metadata.csv.gz') %>%
  group_by(anno_lvl_2, organ) %>%
  summarise(n_cells=n()) %>%
  rename(celltype=anno_lvl_2)

── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  index = col_character(),
  Sample = col_character(),
  donor = col_character(),
  organ = col_character(),
  anno_lvl_2 = col_character(),
  age = col_double(),
  method = col_character()
)

`summarise()` has grouped output by 'anno_lvl_2'. You can override using the `.groups` argument.
n_cells %>%
  filter(str_detect(celltype, "ELP"))
organs <- unique(de_genes$organ)

de_genes %>%
  # filter(celltype=="MATURE_B" & organ=="YS") %>%
  # pivot_longer(cols=organs, names_to="organ_expr", values_to="expr") %>%
  mutate(rank=rank(ltsr)) %>%
  ggplot(aes(rank, ltsr)) + 
  geom_point() 

General stats

How many genes are DE in multiple celltypes per organ?

de_genes %>%
  distinct(organ, celltype, gene) %>%
  group_by(organ, gene) %>%
  summarise(n=n()) %>%
  ggplot(aes(n)) + geom_histogram() +
  facet_wrap(organ~., scales="free")
`summarise()` has grouped output by 'organ'. You can override using the `.groups` argument.

How many genes are DE in multiple organs per celltype?

de_genes %>%
  distinct(organ, celltype, gene) %>%
  group_by(celltype, gene) %>%
  summarise(n=n()) %>%
  ggplot(aes(n)) + geom_histogram() 
`summarise()` has grouped output by 'celltype'. You can override using the `.groups` argument.

Number of DE genes per pair

de_genes %>%
  group_by(organ, celltype) %>%
  summarise(n_de_genes = n()) %>%
  ggplot(aes(organ, celltype, fill=n_de_genes)) +
  geom_tile() +
  scale_fill_viridis_c()

de_genes %>%
  group_by(organ, celltype) %>%
  summarise(n_de_genes = n()) %>%
  ggplot(aes(celltype, n_de_genes)) +
  geom_col() +
  coord_flip() +
  theme_bw(base_size = 14) +
  facet_wrap(organ~., scales="free_x")
`summarise()` has grouped output by 'organ'. You can override using the `.groups` argument.

de_genes %>%
  group_by(organ, celltype) %>%
  summarise(n_de_genes = n()) %>%
  ggplot(aes(celltype, n_de_genes, fill=organ)) +
  geom_col() +
  coord_flip() +
  theme_bw(base_size = 14) +
  scale_color_brewer(palette="Spectral") +
  scale_fill_brewer(palette="Spectral")
`summarise()` has grouped output by 'organ'. You can override using the `.groups` argument.

de_genes %>%
  group_by(organ, celltype) %>%
  summarise(n_de_genes = n()) %>%
  left_join(n_cells) %>%
  ggplot(aes(log10(n_cells), n_de_genes, color=organ)) +
  geom_point() +
  scale_color_brewer(palette="Spectral")
`summarise()` has grouped output by 'organ'. You can override using the `.groups` argument.
Joining, by = c("organ", "celltype")

Overlap between celltype specific genes

for (o in organs){
  p_df <- de_genes %>%
    group_by(gene, organ) %>%
    mutate(n_celltype=n()) %>%
    ungroup() %>%
    filter(organ == o) %>%
    filter(n_celltype > 3) %>%
    # filter(abs(logFoldChange) > 0.1) %>%
    rename(org_expr = o) %>%
    arrange(- org_expr) %>%
    mutate(gene=factor(gene, levels=unique(gene))) %>%
    left_join(ct_class_df) %>%
    mutate(celltype=factor(celltype, levels=unlist(ct_order))) 
  n_genes <- length(unique(p_df$gene))
  pl_height <- ifelse(n_genes > 10, 0.3*n_genes, 1*n_genes)
  p <- p_df %>%
    ggplot(aes(celltype, gene)) +
    facet_grid(organ~class, scales="free_x", space="free") +
    geom_point(aes(fill=org_expr, size=abs(org_expr)), shape=21, color='black') +
    scale_fill_gradient2(high="red", low="blue") +
    scale_color_gradient2(high="red", low="blue") +
    theme_gray(base_size=14) +
    theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) 
  print(p) 
}
Joining, by = "celltype"
Error: Faceting variables must have at least one value
Run `rlang::last_error()` to see where the error occurred.

o = "TH"

p_df <- de_genes %>%
    group_by(gene, organ) %>%
    mutate(n_celltype=n()) %>%
    ungroup() %>%
    filter(organ == o) %>%
    filter(n_celltype > 5) %>%
    # filter(abs(logFoldChange) > 0.1) %>%
    rename(org_expr = o) %>%
    arrange(- org_expr) %>%
    mutate(gene=factor(gene, levels=unique(gene))) %>%
    left_join(ct_class_df) %>%
    mutate(celltype=factor(celltype, levels=unlist(ct_order))) 
Joining, by = "celltype"
n_genes <- length(unique(p_df$gene))

p_top <- n_cells[n_cells$organ==o,] %>%
    mutate(organ=factor(organ, levels=organs)) %>%
    filter(celltype %in% p_df$celltype) %>%
    left_join(ct_class_df) %>%
    mutate(celltype=factor(celltype, levels=unlist(ct_order)))  %>%
    ggplot(aes(celltype, log10(n_cells))) +
      geom_col() +
    theme_bw(base_size=14) +
  facet_grid(organ~class, scales="free_x", space="free") +
  theme(axis.text.x=element_blank()) 
Joining, by = "celltype"
p <- p_df %>%
    ggplot(aes(celltype, gene)) +
    facet_grid(organ~class, scales="free_x", space="free") +
    geom_point(aes(fill=org_expr, size=abs(org_expr)), shape=21, color='black') +
    scale_fill_gradient2(high="red", low="blue") +
    scale_color_gradient2(high="red", low="blue") +
    theme_gray(base_size=14) +
    theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) 

(p_top / p)  + plot_layout(heights=c(1,5))

NA
NA
plot_celltype_org_de(de_genes, org = "BM", ct="ELP/PRE_PRO_B", minFC = 0.5)

plot_celltype_org_de(de_genes, org = "LI", ct="ELP/PRE_PRO_B", minFC = 0.5)

plot_celltype_org_de(de_genes, org = "TH", ct="Progenitor", minFC = 1)

plot_celltype_org_de(de_genes, org = "TH", ct="DN(P)/DN(early)")

plot_celltype_org_de(de_genes, org = "TH", ct="B1")

plot_celltype_org_de(de_genes, org = "LI", ct="B1")

plot_celltype_org_de(de_genes, org = "SP", ct="B1")

plot_celltype_org_de(de_genes, org = "BM", ct="B1")

plot_celltype_org_de(de_genes, org = "SP", ct="NKT")

plot_celltype_org_de(de_genes, org = "SP", ct="NK")

Visualize results

de_genes %>%
  filter(celltype=="CD8aa") %>%
  pivot_longer(cols = organs, names_to="expr_organs", values_to="expr") %>%
  group_by(organ, expr_organs) %>%
  summarise(signature=mean(abs(expr))) %>%
  ggplot(aes(expr_organs, signature, color=organ)) + geom_point()
  
plot_celltype_org_de(de_genes, "MATURE_B", "YS") +
  coord_fixed()
Note: Using an external vector in selections is ambiguous.
ℹ Use `all_of(organs)` instead of `organs` to silence this message.
ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
This message is displayed once per session.

plot_celltype_org_de(de_genes, "CD4+T", "MLN", minFC = 0.3) +
  coord_fixed() +
plot_celltype_org_de(de_genes, "CD8+T", "MLN", minFC = 0.3) +
  coord_fixed() 
  
plot_celltype_org_de(de_genes, "CD4+T", "SK", minFC = 0.2) +
  coord_fixed() +
plot_celltype_org_de(de_genes, "CD4+T", "GU", minFC = 0.2) +
  coord_fixed() +
plot_celltype_org_de(de_genes, "CD4+T", "KI", minFC = 0.2) +
  coord_fixed() 
plot_celltype_org_de(de_genes, "CD8+T", "SP", minFC = 0.2) +
  coord_fixed() 
plot_celltype_org_de(de_genes, "MATURE_B", "TH", minFC = 0.8) +
  coord_fixed()

Read all results

dim(de_organ$beta)
[1] 12517     9
org <- "LI"
ct <- "MATURE_B"

ct_id <- ct_df$ID[ct_df$celltype==ct]

de_ct_org <- readRDS(glue('/nfs/team205/nk5/Team205/ed6/DE/de_{ct_id}.RDS'))
signif_genes <- de_genes$gene[de_genes$organ==org & de_genes$celltype==ct]

data.frame(ct=de_organ$beta[,org], ct_org = de_ct_org$beta[,glue("{ct}:{org}")], gene=rownames(de_celltype$beta)) %>%
  mutate(is_signif = gene %in% signif_genes) %>%
  ggplot(aes(ct, ct_org)) +
  geom_point() +
  geom_point(data= . %>% filter(is_signif), color="red") +
  ggrepel::geom_text_repel(data= . %>% filter(is_signif), color="red", aes(label=gene)) +
  xlab(org) + ylab(glue("{ct}:{org}"))

VS marker genes

org1 <- "BM"
org2 <- "LI"
ct <- "PRO_B"

ct_id <- ct_df$ID[ct_df$celltype==ct]

de_ct_org <- readRDS(glue('/nfs/team205/nk5/Team205/ed6/DE/de_{ct_id}.RDS'))
signif_genes <- de_genes$gene[de_genes$organ==org & de_genes$celltype==ct]

data.frame(ct=de_ct_org$beta[,glue("{ct}:{org1}")], ct_org = de_ct_org$beta[,glue("{ct}:{org2}")], gene=rownames(de_celltype$beta)) %>%
  mutate(is_signif = gene %in% signif_genes) %>%
  ggplot(aes(ct, ct_org)) +
  geom_point() +
  geom_point(data= . %>% filter(is_signif), color="red") +
  geom_text(data= . %>% filter(is_signif), color="red", aes(label=gene)) +
  xlab(glue("{ct}:{org1}")) + ylab(glue("{ct}:{org2}"))

library(Hmisc)

for (ct in ct_df$celltype){
  ct_id <- ct_df$ID[ct_df$celltype==ct]
  de_ct_org <- readRDS(glue('/nfs/team205/nk5/Team205/ed6/DE/de_{ct_id}.RDS'))
  
  cormat <- rcorr(de_ct_org$beta)  
  cormat$r[cormat$P > 0.01] <- 0
  corrplot::corrplot(cormat$r,method = 'color' , diag = TRUE, addCoef.col = "grey", hclust.method = "ward")
  
}

de_genes %>%
  group_by(organ, celltype) %>%
  summarise(n_de_genes = n()) %>%
  ggplot(aes(organ, n_de_genes)) +
  geom_col() +
  coord_flip() +
  theme_bw(base_size = 14) +
  facet_wrap(celltype~., scales="free_x")
`summarise()` has grouped output by 'organ'. You can override using the `.groups` argument.

Compare correlation between coefficients and signatures

i.e. cells where the gene set is expressed

cor(sig_mat[1:100,1:10])
                        sig_score_TH_Progenitor sig_score_TH_abT(entry) sig_score_BM_B1 sig_score_GU_B1 sig_score_LI_B1 sig_score_SP_B1
sig_score_TH_Progenitor              1.00000000               0.2607735     -0.03180426    -0.049933182     -0.38547715    -0.142414491
sig_score_TH_abT(entry)              0.26077353               1.0000000      0.12534175    -0.179732663     -0.24388387     0.147321331
sig_score_BM_B1                     -0.03180426               0.1253418      1.00000000    -0.503860150     -0.08516963     0.035257081
sig_score_GU_B1                     -0.04993318              -0.1797327     -0.50386015     1.000000000      0.29912966    -0.003256735
sig_score_LI_B1                     -0.38547715              -0.2438839     -0.08516963     0.299129659      1.00000000     0.325463255
sig_score_SP_B1                     -0.14241449               0.1473213      0.03525708    -0.003256735      0.32546326     1.000000000
sig_score_TH_B1                     -0.03911572              -0.1111063     -0.64250985     0.783402416      0.18909408    -0.003337539
sig_score_BM_CD4+T                   0.12708686              -0.2959110     -0.21383857     0.270087050      0.30057736    -0.082727222
sig_score_LI_CD4+T                  -0.32082456              -0.3293925     -0.45615206     0.656951830      0.54229874     0.222532475
sig_score_MLN_CD4+T                 -0.21302739              -0.3044921     -0.18037049     0.048376133      0.06461616     0.165906852
                        sig_score_TH_B1 sig_score_BM_CD4+T sig_score_LI_CD4+T sig_score_MLN_CD4+T
sig_score_TH_Progenitor    -0.039115717         0.12708686         -0.3208246         -0.21302739
sig_score_TH_abT(entry)    -0.111106349        -0.29591101         -0.3293925         -0.30449214
sig_score_BM_B1            -0.642509845        -0.21383857         -0.4561521         -0.18037049
sig_score_GU_B1             0.783402416         0.27008705          0.6569518          0.04837613
sig_score_LI_B1             0.189094076         0.30057736          0.5422987          0.06461616
sig_score_SP_B1            -0.003337539        -0.08272722          0.2225325          0.16590685
sig_score_TH_B1             1.000000000         0.09536250          0.6250180          0.03827978
sig_score_BM_CD4+T          0.095362501         1.00000000          0.1790338          0.07268378
sig_score_LI_CD4+T          0.625018029         0.17903375          1.0000000          0.17857203
sig_score_MLN_CD4+T         0.038279779         0.07268378          0.1785720          1.00000000

long_cor <- left_join(long_cor_sig, long_cor_beta) 
Joining, by = c("Var1", "Var2")

---
title: "Pan Fetal Immune - DE results by Natsuhiko"
output: html_notebook
---

```{r}
library(tidyverse)
```

setup for GO term analysis

```{r, echo=FALSE, warning=FALSE, message=FALSE}
# BiocManager::install('clusterProfiler')
# BiocManager::install('msigdbr')
library(clusterProfiler)
library(msigdbr)

m_df <- msigdbr(species = "Homo sapiens")
m_t2g <- msigdbr(species = "Homo sapiens", category = "C5", subcategory = "BP")  %>% 
  dplyr::select(gs_name, gene_symbol)
```


The LMM was run by Natsuhiko, see `/nfs/team205/nk5/Team205/ed6/DE/run.R`

```{r}
outfiles <- list.files('/nfs/team205/nk5/Team205/ed6/DE', pattern = "de", full.names = TRUE)

ct_df <- read_tsv(list.files('/nfs/team205/nk5/Team205/ed6/DE/', pattern = "^cell", full.names = TRUE), col_names = FALSE) %>%
  rename(ID=X1, celltype=X2)

## All the genes
gene = read.csv("/nfs/team205/nk5/Team205/ed6/gene.csv.gz",as.is=T)
"HGH1" %in% gene$GeneName
```

## Read results

Table provided by Natsuhiko

```{r}
de_genes <- read_csv("~/mount/gdrive/Pan_fetal/significant_genes/LMM_Natsuhiko_results/de_lymphoid_ltsr0.9.csv")
colnames(de_genes) <- str_remove(colnames(de_genes), " .+")
de_genes
```

Save number of cells x celltype and organ

```{r}
n_cells <- read_csv('/nfs/team205/nk5/Team205/ed6/metadata.csv.gz') %>%
  group_by(anno_lvl_2, organ) %>%
  summarise(n_cells=n()) %>%
  rename(celltype=anno_lvl_2)
n_cells %>%
  filter(str_detect(celltype, "ELP"))
```


```{r}
organs <- unique(de_genes$organ)

de_genes %>%
  # filter(celltype=="MATURE_B" & organ=="YS") %>%
  # pivot_longer(cols=organs, names_to="organ_expr", values_to="expr") %>%
  mutate(rank=rank(ltsr)) %>%
  ggplot(aes(rank, ltsr)) + 
  geom_point() 
```

## General stats

How many genes are DE in multiple celltypes per organ?
```{r}
de_genes %>%
  distinct(organ, celltype, gene) %>%
  group_by(organ, gene) %>%
  summarise(n=n()) %>%
  ggplot(aes(n)) + geom_histogram() +
  facet_wrap(organ~., scales="free")

```

How many genes are DE in multiple organs per celltype?
```{r}
de_genes %>%
  distinct(organ, celltype, gene) %>%
  group_by(celltype, gene) %>%
  summarise(n=n()) %>%
  ggplot(aes(n)) + geom_histogram() 

```

### Number of DE genes per pair

```{r, message=FALSE}
de_genes %>%
  group_by(organ, celltype) %>%
  summarise(n_de_genes = n()) %>%
  ggplot(aes(organ, celltype, fill=n_de_genes)) +
  geom_tile() +
  scale_fill_viridis_c()
```
```{r, fig.height=11, fig.width=11}
de_genes %>%
  group_by(organ, celltype) %>%
  summarise(n_de_genes = n()) %>%
  ggplot(aes(celltype, n_de_genes)) +
  geom_col() +
  coord_flip() +
  theme_bw(base_size = 14) +
  facet_wrap(organ~., scales="free_x")
```

```{r, fig.height=12, fig.width=7}
de_genes %>%
  group_by(organ, celltype) %>%
  summarise(n_de_genes = n()) %>%
  ggplot(aes(celltype, n_de_genes, fill=organ)) +
  geom_col() +
  coord_flip() +
  theme_bw(base_size = 14) +
  scale_color_brewer(palette="Spectral") +
  scale_fill_brewer(palette="Spectral")
  
# facet_wrap(organ~., scales="free_x")
```
```{r}
de_genes %>%
  group_by(organ, celltype) %>%
  summarise(n_de_genes = n()) %>%
  left_join(n_cells) %>%
  ggplot(aes(log10(n_cells), n_de_genes, color=organ)) +
  geom_point() +
  scale_color_brewer(palette="Spectral")
   
```

Overlap between celltype specific genes
```{r, fig.width=10, fig.height=15}
ct_order <- list(progenitors=c("Progenitor", "MPP"),
                 B_cell = c("ELP/PRE_PRO_B", "EARLY_PRO_B","PRO_B", "LATE_PRO_B",'PRO_TO_PRE_B', "LARGE_PRE_B", "SMALL_PRE_B", "IMMATURE_B", "MATURE_B",'CYCLING_MATURE_B', "B1", "PLASMA_B"),
                 ILC = c("ILC"),
                 T_cell = c("DN(P)/DN(early)", "DN(Q)", "DP(P)", "DP(Q)", "abT(entry)", "CD4+T", "CD8+T", 'CD8aa',"Treg","NKT"),
                 NK = c("NK"))

ct_class_df <- imap(ct_order, ~ data.frame(celltype=.x, class=.y)) %>%
  bind_rows() %>%
  mutate(celltype=factor(celltype, levels=unlist(ct_order)))

for (o in organs){
  p_df <- de_genes %>%
    group_by(gene, organ) %>%
    mutate(n_celltype=n()) %>%
    ungroup() %>%
    filter(organ == o) %>%
    filter(n_celltype > 3) %>%
    # filter(abs(logFoldChange) > 0.1) %>%
    rename(org_expr = o) %>%
    arrange(- org_expr) %>%
    mutate(gene=factor(gene, levels=unique(gene))) %>%
    left_join(ct_class_df) %>%
    mutate(celltype=factor(celltype, levels=unlist(ct_order))) 
  n_genes <- length(unique(p_df$gene))
  pl_height <- ifelse(n_genes > 10, 0.3*n_genes, 1*n_genes)
  p <- p_df %>%
    ggplot(aes(celltype, gene)) +
    facet_grid(organ~class, scales="free_x", space="free") +
    geom_point(aes(fill=org_expr, size=abs(org_expr)), shape=21, color='black') +
    scale_fill_gradient2(high="red", low="blue") +
    scale_color_gradient2(high="red", low="blue") +
    theme_gray(base_size=14) +
    theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) 
  print(p) 
}

```
```{r, fig.height=15, fig.width=10}
o = "TH"

p_df <- de_genes %>%
    group_by(gene, organ) %>%
    mutate(n_celltype=n()) %>%
    ungroup() %>%
    filter(organ == o) %>%
    filter(n_celltype > 5) %>%
    # filter(abs(logFoldChange) > 0.1) %>%
    rename(org_expr = o) %>%
    arrange(- org_expr) %>%
    mutate(gene=factor(gene, levels=unique(gene))) %>%
    left_join(ct_class_df) %>%
    mutate(celltype=factor(celltype, levels=unlist(ct_order))) 

n_genes <- length(unique(p_df$gene))

p_top <- n_cells[n_cells$organ==o,] %>%
    mutate(organ=factor(organ, levels=organs)) %>%
    filter(celltype %in% p_df$celltype) %>%
    left_join(ct_class_df) %>%
    mutate(celltype=factor(celltype, levels=unlist(ct_order)))  %>%
    ggplot(aes(celltype, log10(n_cells))) +
      geom_col() +
    theme_bw(base_size=14) +
  facet_grid(organ~class, scales="free_x", space="free") +
  theme(axis.text.x=element_blank()) 

p <- p_df %>%
    ggplot(aes(celltype, gene)) +
    facet_grid(organ~class, scales="free_x", space="free") +
    geom_point(aes(fill=org_expr, size=abs(org_expr)), shape=21, color='black') +
    scale_fill_gradient2(high="red", low="blue") +
    scale_color_gradient2(high="red", low="blue") +
    theme_gray(base_size=14) +
    theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) 

(p_top / p)  + plot_layout(heights=c(1,5))
  
  
```

```{r, echo=FALSE, warning=FALSE, message=FALSE}

marker_genes_up <- p_df$gene

em_up <- enricher(marker_genes_up, TERM2GENE=m_t2g, pAdjustMethod = "fdr", 
                  universe = genes$GeneName
                  )

em_res_up <- em_up@result[em_up@result$qvalue < 0.1,] %>%
  dplyr::select(- c(Description))

em_res_up[c('ID',"GeneRatio","geneID")]
# em_res_down[c('ID',"GeneRatio","geneID")]
```

<!-- ```{r, fig.height=10, fig.width=6} -->
<!-- de_genes %>% -->
<!--   filter(organ=="LI") %>% -->
<!--   select(celltype, gene, logFoldChange) %>% -->
<!--   pivot_wider(id_cols=gene, names_from=celltype, values_from="logFoldChange", values_fill=0) %>% -->
<!--   column_to_rownames("gene") %>% -->
<!--   as.matrix() %>% -->
<!--   pheatmap::pheatmap() -->
<!-- ``` -->


```{r, fig.height=12, fig.width=7}
plot_celltype_org_de(de_genes, org = "BM", ct="ELP/PRE_PRO_B", minFC = 0.5)
plot_celltype_org_de(de_genes, org = "LI", ct="ELP/PRE_PRO_B", minFC = 0.5)
```

```{r}
de_genes %>%
  group_by(organ, celltype) %>%
  do(data.frame(Cor=t(cor(.[,organs],use = )), organ2=organs)) %>%
  pivot_longer(cols = str_c('Cor.', organs), names_to="organ1") %>%
  filter(celltype=="ELP/PRE_PRO_B" & organ=="BM") %>%
  ggplot(aes(organ1, organ2)) +
  geom_tile(aes(fill=value)) +
  facet_wrap("organ") +
  scale_fill_gradient2(high="red", low="blue")
  
  
```
```{r, fig.height=12, fig.width=7}
org <- "BM"
# cmat <- 
  de_genes %>%
  # filter(organ %in% org) %>%
  group_by(organ, celltype) %>%
  filter(n() > 20) %>%
    filter(celltype=="PRO_B")
  do(data.frame(Cor=t(cor(.[,organs],use = )), organ2 = organs)) %>%
  pivot_longer(cols = str_c('Cor.', organs), names_to="organ1") %>%
  mutate(organ1=str_remove(organ1, "Cor.")) %>%
  filter(organ2==organ) %>%
  filter(organ1 != org) %>%
  ggplot(aes(organ1, celltype, fill=value)) + 
  geom_tile() +
    facet_grid(organ2~., scales="free", space="free") +
    scale_fill_gradient2(high="red", low="blue")

```
```{r, fig.height=10, fig.width=7}
plot_celltype_org_de(de_genes, org = "TH", ct="Progenitor", minFC = 1)
plot_celltype_org_de(de_genes, org = "TH", ct="DN(P)/DN(early)")
```
```{r, fig.height=10, fig.width=7}
plot_celltype_org_de(de_genes, org = "TH", ct="B1")
plot_celltype_org_de(de_genes, org = "LI", ct="B1")
plot_celltype_org_de(de_genes, org = "SP", ct="B1")
plot_celltype_org_de(de_genes, org = "BM", ct="B1")
```

```{r, fig.height=10, fig.width=7}
plot_celltype_org_de(de_genes, org = "SP", ct="NKT")
plot_celltype_org_de(de_genes, org = "SP", ct="NK")
```

## Visualize results

```{r}
library(patchwork)
## Visualize gene expression for one celltype - organ pair
plot_celltype_org_de <- function(de_genes, ct, org, minFC=0){
  df <- de_genes %>%
  filter(celltype==ct & organ==org) 
  
  if (minFC > 0) {
    df <- filter(df, abs(logFoldChange) > minFC)
  }
  
  p_top <- n_cells[n_cells$celltype==ct,] %>%
    mutate(organ=factor(organ, levels=organs)) %>%
    ggplot(aes(organ, log10(n_cells))) +
      geom_col() +
    ggtitle(paste(ct, org, sep='-')) +
    theme_bw(base_size=14) 
  p_bot <- df %>%
  arrange(logFoldChange) %>%
  mutate(gene = factor(gene, levels=gene)) %>%
  pivot_longer(cols=organs, names_to="organ_expr", values_to="expr") %>%
    mutate(organ_expr=factor(organ_expr, levels=organs)) %>%
  ggplot(aes(organ_expr, gene)) + geom_tile(aes(fill=expr)) +
  scale_fill_gradient2(high = "red", low="blue", name="Avg. Expr.") +
    theme_bw(base_size=14) 
  (p_top / p_bot) + plot_layout(heights=c(1,8))
  
}

## Visualize one gene (where was that gene DE?)
plot_gene_de <- function(de_genes, g){
  de_genes %>%
    filter(gene==g) %>%
    ggplot(aes(organ, celltype, fill=logFoldChange)) +
    geom_tile() +
    scale_fill_gradient2(high = "red", low="blue", name="LogFC") +
    theme_bw(base_size=16) +
    ggtitle(g)
  }

plot_gene_de(de_genes, "NKG7")
plot_gene_de(de_genes, "CD40")

## B1 cells
B1_genes <- c("MSA41", "CD24", "CD40", "IGHM", "IGHD","FCER2")
for (g in B1_genes) { print(plot_gene_de(de_genes, g=g)) }

plot_gene_de(de_genes, "RGS1")
plot_gene_de(de_genes, "IL32")
plot_gene_de(de_genes, "C1QC")
## NK markers
plot_gene_de(de_genes, "KLRB1")
plot_gene_de(de_genes, "CD3G")
plot_gene_de(de_genes, "MS4A1")

## ETP markers
lapply(c('ACY3', 'KCNK17', 'NPTX2'), plot_gene_de, de_genes=de_genes)
```

<!-- ```{r, fig.height=50, fig.width=7} -->
<!-- ys_ls <- lapply(ct_df$celltype, function(ct) plot_celltype_org_de(de_genes, org="YS", ct=ct) + coord_fixed()) -->
<!-- wrap_plots(ys_ls, ncol=1, guides="collect") -->

<!-- de_genes %>% -->
<!--   filter(organ=="YS") %>% -->
<!--   # arrange(logFoldChange) %>% -->
<!--   # mutate(gene = factor(gene, levels=gene)) %>% -->
<!--   pivot_longer(cols=organs, names_to="organ_expr", values_to="expr") %>% -->
<!--   ggplot(aes(organ_expr, gene)) +  -->
<!--   geom_tile(aes(fill=expr)) + -->
<!--   facet_grid(celltype~., space="free", scales="free") + -->
<!--   scale_fill_gradient2(high = "red", low="blue", name="Avg. Expr.") + -->
<!--   theme(strip.text.y = element_text(angle=0)) -->
<!--   # ggtitle(paste(ct, org, sep='-')) -->
<!-- ``` -->

```{r}
de_genes %>%
  filter(celltype=="CD8aa") %>%
  pivot_longer(cols = organs, names_to="expr_organs", values_to="expr") %>%
  group_by(organ, expr_organs) %>%
  summarise(signature=mean(abs(expr))) %>%
  ggplot(aes(expr_organs, signature, color=organ)) + geom_point()
  
```

```{r, fig.height=12, fig.width=13}

plot_celltype_org_de(de_genes, "MATURE_B", "YS") +
  coord_fixed()
plot_celltype_org_de(de_genes, "ILC", "LI", minFC = 0.5) +
  coord_fixed()

plot_celltype_org_de(de_genes, "NKT", "TH", minFC = 0.5) +
  coord_fixed()

plot_celltype_org_de(de_genes, "Progenitor", "TH", minFC = 0.5) +
  coord_fixed()
plot_celltype_org_de(de_genes, "CD4+T", "TH", minFC = 0.5) +
  coord_fixed() +
plot_celltype_org_de(de_genes, "CD8+T", "TH", minFC = 0.5) +
  coord_fixed()
```
```{r, fig.height=10, fig.width=8}
plot_celltype_org_de(de_genes, "CD4+T", "MLN", minFC = 0.3) +
  coord_fixed() +
plot_celltype_org_de(de_genes, "CD8+T", "MLN", minFC = 0.3) +
  coord_fixed() 
  
```
```{r, fig.height=7}
plot_celltype_org_de(de_genes, "CD4+T", "SK", minFC = 0.2) +
  coord_fixed() +
plot_celltype_org_de(de_genes, "CD4+T", "GU", minFC = 0.2) +
  coord_fixed() +
plot_celltype_org_de(de_genes, "CD4+T", "KI", minFC = 0.2) +
  coord_fixed() 
plot_celltype_org_de(de_genes, "CD8+T", "SP", minFC = 0.2) +
  coord_fixed() 

```
```{r}
plot_celltype_org_de(de_genes, "MATURE_B", "TH", minFC = 0.8) +
  coord_fixed()
```


## Read all results

```{r}
library(glue)
load('/nfs/team205/nk5/Team205/ed6/DE/de_celltype.Rbin')
load('/nfs/team205/nk5/Team205/ed6/DE/de_organ.Rbin')

dim(de_organ$beta
```
```{r}
org <- "LI"
ct <- "MATURE_B"

ct_id <- ct_df$ID[ct_df$celltype==ct]

de_ct_org <- readRDS(glue('/nfs/team205/nk5/Team205/ed6/DE/de_{ct_id}.RDS'))
signif_genes <- de_genes$gene[de_genes$organ==org & de_genes$celltype==ct]

data.frame(ct=de_organ$beta[,org], ct_org = de_ct_org$beta[,glue("{ct}:{org}")], gene=rownames(de_celltype$beta)) %>%
  mutate(is_signif = gene %in% signif_genes) %>%
  ggplot(aes(ct, ct_org)) +
  geom_point() +
  geom_point(data= . %>% filter(is_signif), color="red") +
  ggrepel::geom_text_repel(data= . %>% filter(is_signif), color="red", aes(label=gene)) +
  xlab(org) + ylab(glue("{ct}:{org}"))
```


VS marker genes

```{r}
org <- "LI"
ct <- "PRO_B"

plot_coeffs_markers <- function(ct, org){
  ct_id <- ct_df$ID[ct_df$celltype==ct]
  de_ct_org <- readRDS(glue('/nfs/team205/nk5/Team205/ed6/DE/de_{ct_id}.RDS'))
  signif_genes <- de_genes$gene[de_genes$organ==org & de_genes$celltype==ct]
  
  data.frame(ct=de_celltype$beta[,ct], ct_org = de_ct_org$beta[,glue("{ct}:{org}")], gene=rownames(de_celltype$beta)) %>%
    mutate(is_signif = gene %in% signif_genes) %>%
    ggplot(aes(ct, ct_org)) +
    geom_point() +
    geom_point(data= . %>% filter(is_signif), color="red") +
    ggrepel::geom_text_repel(data= . %>% filter(is_signif), color="red", aes(label=gene)) +
    xlab(ct) + ylab(glue("{ct}:{org}"))
  
}

plot_coeffs_markers("NKT", "SP")
plot_coeffs_markers("MATURE_B", "SP")
```

```{r}
org1 <- "BM"
org2 <- "LI"
ct <- "PRO_B"

ct_id <- ct_df$ID[ct_df$celltype==ct]

de_ct_org <- readRDS(glue('/nfs/team205/nk5/Team205/ed6/DE/de_{ct_id}.RDS'))
signif_genes <- de_genes$gene[de_genes$organ==org & de_genes$celltype==ct]

data.frame(ct=de_ct_org$beta[,glue("{ct}:{org1}")], ct_org = de_ct_org$beta[,glue("{ct}:{org2}")], gene=rownames(de_celltype$beta)) %>%
  mutate(is_signif = gene %in% signif_genes) %>%
  ggplot(aes(ct, ct_org)) +
  geom_point() +
  geom_point(data= . %>% filter(is_signif), color="red") +
  geom_text(data= . %>% filter(is_signif), color="red", aes(label=gene)) +
  xlab(glue("{ct}:{org1}")) + ylab(glue("{ct}:{org2}"))
```


```{r}
library(Hmisc)

for (ct in ct_df$celltype){
  ct_id <- ct_df$ID[ct_df$celltype==ct]
  de_ct_org <- readRDS(glue('/nfs/team205/nk5/Team205/ed6/DE/de_{ct_id}.RDS'))
  
  cormat <- rcorr(de_ct_org$beta)  
  cormat$r[cormat$P > 0.01] <- 0
  corrplot::corrplot(cormat$r,method = 'color' , diag = TRUE, addCoef.col = "grey", hclust.method = "ward")
  
}
```
```{r, fig.height=11, fig.width=11}
de_genes %>%
  group_by(organ, celltype) %>%
  summarise(n_de_genes = n()) %>%
  ggplot(aes(organ, n_de_genes)) +
  geom_col() +
  coord_flip() +
  theme_bw(base_size = 14) +
  facet_wrap(celltype~., scales="free_x")
```


### Compare correlation between coefficients and signatures
i.e. cells where the gene set is expressed
```{r}
## Load signatures
sig_mat <- read_csv("/nfs/team205/ed6/data/Fetal_immune/LMM_lymph_signatures_scores.csv") %>%
  column_to_rownames("X1") 

cor_sig <- cor(sig_mat)
```


```{r}
beta_ls <- lapply(ct_df$celltype, function(ct){
  ct_id <- ct_df$ID[ct_df$celltype==ct]
  de_ct_org <- readRDS(glue('/nfs/team205/nk5/Team205/ed6/DE/de_{ct_id}.RDS'))
  de_ct_org$beta
  })

beta_mat <- reduce(beta_ls, cbind) 

beta_df <- data.frame(beta_mat)
colnames(beta_df) <- colnames(beta_mat)
write_csv(beta_df, "/home/jovyan/mount/gdrive/Pan_fetal/significant_genes/LMM_Natsuhiko_results/de_lymphoid_ltsr0.9.beta.csv")

cor_beta <- cor(beta_mat)
```
```{r, fig.height=15, fig.width=15}
library(RColorBrewer)

keep_cols <- de_genes %>%
  group_by(celltype, organ) %>%
  summarise(n=n()) %>%
  filter(n > 30) %>%
  mutate(cols = str_c(celltype,":", organ)) %>%
  pull(cols)

anno_df <- data.frame(names=colnames(cor_beta))  %>%
  mutate(cols=names) %>%
  separate(names, into=c("ct", "organ"), sep=":") %>%
  select(organ, cols) %>%
  column_to_rownames('cols')

palette <- brewer.pal(length(organs), name="Spectral")

pheatmap::pheatmap(cor_beta[keep_cols,keep_cols], annotation_col = anno_df, annotation_row = anno_df, annotation_colors = list(organ=setNames(palette, organs)))
```

```{r}
switch_org_ct <- function(s){
  str_ls <- str_split(s, "_")
  sapply(str_ls, function(x) str_c(x[2], "_", x[1]))
}

long_cor_beta <- melt(cor_beta) %>%
  mutate(Var1 = str_replace(Var1, ":", "_"), Var2 = str_replace(Var2, ":", "_") ) %>%
  mutate(Var1 = switch_org_ct(Var1), Var2 = switch_org_ct(Var2)) %>%
  rename(cor_beta = value)

long_cor_sig <- melt(cor_sig) %>%
  mutate(Var1 = str_remove(Var1, "sig_score_"), Var2 = str_remove(Var2, "sig_score_") ) %>%
  rename(cor_sig = value)

long_cor <- left_join(long_cor_sig, long_cor_beta) 
```
```{r}
long_cor %>%
  filter(Var1!=Var2) %>%
  mutate(label=ifelse((abs(cor_sig) > 0.3) | (abs(cor_beta) > 0.3), str_c(Var1,"_",Var2), NA)) %>%
  mutate(class = case_when(str_remove(Var1, "_.+") == str_remove(Var2, "_.+") ~ "same organ",
                           str_remove(Var1, ".+_") == str_remove(Var2, ".+_") ~ "same cell type")) %>%
  ggplot(aes(cor_sig, cor_beta, color=class)) +
  geom_point(size=1) +
  stat_cor() +
  ggrepel::geom_text_repel(aes(label=label)) +
  xlab("Signature correlation (same cells)") +
  ylab("Beta correlation (same genes)") 
```
```{r, fig.height=12, fig.width=5}
plot_celltype_org_de(de_genes, "Progenitor", "TH", minFC = 1) 
plot_celltype_org_de(de_genes, "PRO_B", "BM") 
```

```{r}
load('/nfs/team205/nk5/Team205/ed6/flag_0.05.Rbin')
genes <- read.csv("/nfs/team205/nk5/Team205/ed6/gene.csv.gz",as.is=T)
write_csv(genes[flag,]["GeneName"], "/home/jovyan/mount/gdrive/Pan_fetal/significant_genes/LMM_Natsuhiko_results/de_lymphoid_ltsr0.9.genes.csv")
```


